#!/usr/bin/python3

# This file is part of Cockpit.
#
# Copyright (C) 2016 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.

import subprocess
import time
import re

import parent
from testlib import *


def read_mem_info(machine):
    info = {}
    for l in machine.execute("cat /proc/meminfo").splitlines():
        (name, value) = l.strip().split(":")
        if value.endswith("kB"):
            info[name] = int(value[:-2]) * 1024
        else:
            info[name] = int(value)
    return info


class TestMetrics(MachineCase):
    provision = {
        "0": {"forward": {"2000": 2000}},
    }

    def check_host_metrics(self):
        b = self.browser
        m = self.machine

        b.eval_js("""
          ph_plot_timestamp = function (sel) {
            var data = $(sel).data("flot_data")[0]['data'];
            return data[data.length-1][0];
          }
        """)

        b.eval_js("""
          ph_plot_timestamp_is = function (sel, ts) {
            return ph_plot_timestamp(sel, 0) >= ts;
          }
        """)

        def wait_for_plot(sel, seconds):
            now = b.call_js_func("ph_plot_timestamp", sel)
            b.wait_js_func("ph_plot_timestamp_is", sel, now + 20 * 1000)

        # When checking whether the plots show the expected results,
        # we look for a segment of the data of a certain duration
        # whose average is in a certain range.  Otherwise any short
        # outlier will make us miss the expected plateau.  Such
        # outliers happen frequently with the CPU plot.

        b.eval_js("""
          ph_plateau = function (data, min, max, duration, label) {
              var i, j;
              var sum;  // sum of data[i..j]

              sum = 0;
              i = 0;
              for (j = 0; j < data.length; j++) {
                  sum += data[j][1];
                  while (i < j && (data[j][0] - data[i][0]) > duration * 1000) {
                      avg = sum / (j - i + 1);
                      if ((min === null || avg >= min) && (max === null || avg <= max))
                          return true;
                      sum -= data[i][1];
                      i++;
                  }
              }
            return false;
          }
        """)

        b.eval_js("""
          ph_flot_data_plateau = function (sel, min, max, duration, label) {
            return ph_plateau($(sel).data("flot_data")[0]['data'], min, max, duration, label);
          }
        """)

        # CPU.  We have to measure the actual percentage independently
        # and check whether the plot matches it.  This is because we
        # can not reliably create 100% load in a virtual machine since
        # we only get as much as the qemu process itself gets.
        if m.image not in ["fedora-coreos"]:  # no python there
            m.upload(["verify/files/measure-cpu"], "/var/lib/testvm/")
            cmd = "$(which python3 || which python2) /var/lib/testvm/measure-cpu"

            cpu_start = m.execute(cmd + " start")
            cpu_pid = m.spawn("while true; do true; done", "cpu-load.log")
            wait_for_plot("#server_cpu_graph", 20)
            m.execute("kill %d" % cpu_pid)
            cpu_percentage = float(m.execute("%s stop '%s'" % (cmd, cpu_start)))

            print("Measured CPU", cpu_percentage)
            self.assertTrue(b.call_js_func("ph_flot_data_plateau", "#server_cpu_graph",
                                           cpu_percentage * 0.50, cpu_percentage * 1.50, 15, "cpu"))

        # Memory.  We assume that the machine has had a reasonably
        # stable memory situation for the last 20 seconds and we just
        # check whether we see that in the plot.
        #
        meminfo = read_mem_info(m)
        mem_used = meminfo['MemTotal'] - meminfo['MemAvailable']
        print("Measured Memory", mem_used)
        b.wait_js_func("ph_flot_data_plateau", "#server_memory_graph", mem_used * 0.95, mem_used * 1.05, 15, "mem")

        # Disk.  Anything above 100 kiB/s is good for now.
        #
        m.add_disk("10M", serial="MYSERIAL")
        disk_pid = m.spawn("while true; do dd if=/dev/sda of=/dev/sda; done", "load-disk.log")
        b.wait_js_func("ph_flot_data_plateau", "#server_disk_io_graph", 100 * 1024, None, 5, "disk io")
        m.execute("kill %d" % disk_pid)

        # Network traffic through Ethernet.  Anything above 300 kb/s is good for now.
        #
        ifaces = m.execute(r"ip -o a | awk '{print $2}'")
        iface = None
        for i in ifaces.split('\n'):
            if i.startswith("eth") or i.startswith("ens"):
                iface = i
                break

        if not iface:
            raise Exception("Couldn't find interface to sample")

        m.execute("if firewall-cmd --state >/dev/null 2>&1; then firewall-cmd --permanent --zone=public --add-port=2000/tcp && firewall-cmd --reload; fi")

        (eth_before, lo_before) = m.execute(r"grep -E '\b" + iface + r":' /proc/net/dev | awk '{print $2 + $10}'; "
                                            r"grep '\blo:' /proc/net/dev | awk '{print $2 + $10}'").strip().split()
        time_before = time.time()

        net_pid = m.spawn("socat TCP-LISTEN:2000 PIPE", "load-net.log")

        # might take several attemps until server-side is listening
        (host, port) = m.forward["2000"].split(":")
        client = subprocess.Popen(
            "for retry in `seq 10`; do nc {0} {1} </dev/zero >/dev/null; sleep 1; done".format(host, port), shell=True)

        try:
            b.wait_js_func("ph_flot_data_plateau", "#server_network_traffic_graph", 300 * 1000, None, 5, "net io")
        finally:
            time_after = time.time()
            (eth_after, lo_after) = m.execute(r"grep -E '" + iface + r"' /proc/net/dev | awk '{print $2 + $10}'; "
                                              r"grep '\blo:' /proc/net/dev | awk '{print $2 + $10}'").strip().split()
            print(("Measured {0:.1f}s of network traffic; {3}: {1} bytes, lo: {2} bytes".format(
                time_after - time_before, int(eth_after) - int(eth_before), int(lo_after) - int(lo_before), iface)))

            client.terminate()
            client.wait()

        m.execute("kill %d" % net_pid)

        if m.image in ["rhel-8-2-distropkg"]:
            return  # HACK: individual graph pages broken, fixed in PR #13438

        el_width = "(function (sel) { return ph_find(sel).offsetWidth; })"
        el_height = "(function (sel) { return ph_find(sel).offsetHeight; })"

        # CPU graph page
        b.click("#link-cpu")
        b.wait_not_visible(".ct-graph-memory")
        b.wait_visible("#cpu_status_title")
        b.wait_visible("#cpu_status_graph")
        self.assertGreater(b.call_js_func(el_width, "#cpu_status_graph"), 500)
        self.assertGreater(b.call_js_func(el_height, "#cpu_status_graph"), 300)
        b.wait_visible(".server-graph-legend .cpu-kernel")

        # go back to graphs overview
        b.click("#graphs-link")
        b.wait_visible(".ct-graph-memory")
        b.wait_visible(".ct-graph-cpu")

        # memory graph page
        if m.image in ["fedora-coreos", "ubuntu-1804", "ubuntu-stable", "debian-stable", "debian-testing"]:
            # no swap on these
            b.wait_not_visible("#link-memory-and-swap")
            b.click("#link-memory")
        else:
            b.wait_not_visible("#link-memory")
            b.click("#link-memory-and-swap")
        b.wait_not_visible(".ct-graph-cpu")
        b.wait_visible("#memory_status_title")
        b.wait_visible("#memory_status_graph")
        self.assertGreater(b.call_js_func(el_width, "#memory_status_graph"), 500)
        self.assertGreater(b.call_js_func(el_height, "#memory_status_graph"), 300)
        b.wait_visible(".server-graph-legend .memory-used")

    @skipImage("No PCP available", "fedora-coreos")
    def testPcp(self):
        m = self.machine

        self.login_and_go("/system/graphs")

        self.check_host_metrics()
        m.execute("pgrep cockpit-pcp")

    def testInternal(self):
        m = self.machine

        m.execute("! [ -f /usr/libexec/cockpit-pcp ] || rm /usr/libexec/cockpit-pcp")
        m.execute("! [ -f /usr/lib/cockpit/cockpit-pcp ] || rm /usr/lib/cockpit/cockpit-pcp")

        self.login_and_go("/system/graphs")

        self.check_host_metrics()
        m.execute("! pgrep cockpit-pcp")

    def testOverview(self):
        m = self.machine
        b = self.browser

        # packagekit often eats a lot of CPU; silence it to not screw up the "system is idle" test
        m.execute("systemctl mask packagekit")

        def progressValue(number):
            sel = ".system-usage tr:nth-child(%i) .pf-c-progress__indicator" % number
            b.wait_present(sel)
            b.wait_attr_contains(sel, "style", "width:")
            style = b.attr(sel, "style")
            m = re.search("width: (\d+)%;", style)
            return int(m.group(1))

        self.login_and_go("/system")

        # CPU
        # first wait until system settles down
        wait(lambda: progressValue(1) < 20)
        m.spawn("for i in $(seq $(nproc)); do cat /dev/urandom > /dev/null & done", "cpu_hog.log")
        wait(lambda: progressValue(1) > 90)
        m.execute("pkill -e -f cat.*urandom")
        # should go back to idle usage
        wait(lambda: progressValue(1) < 20)

        # memory: our test machines have ~ 1 GiB of memory, a reasonable chunk of it should be used
        b.wait_in_text(".system-usage tr:nth-child(2)", " GiB")
        initial_usage = progressValue(2)
        self.assertGreater(initial_usage, 10)
        self.assertLess(initial_usage, 80)
        # allocate an extra 300 MB; this may cause other stuff to get unmapped,
        # thus not exact addition, but usage should go up
        mem_hog = m.spawn("MEMBLOB=$(yes | dd bs=1M count=300); touch /tmp/hogged; sleep infinity", "mem_hog.log")
        m.execute("while [ ! -e /tmp/hogged ]; do sleep 1; done")
        # bars update every 5s
        time.sleep(8)
        hog_usage = progressValue(2)
        self.assertGreater(hog_usage, initial_usage + 10)

        m.execute("kill %d" % mem_hog)
        # should go back to initial_usage; often below, due to paged out stuff
        wait(lambda: progressValue(2) <= initial_usage)
        self.assertGreater(progressValue(2), 10)


if __name__ == '__main__':
    test_main()
